import pandas as pd
import gene_exp_10x
import matplotlib.pyplot as plt
%matplotlib inline
from copy import deepcopy
from clustergrammer2 import net
def umi_norm(df):
# umi norm
barcode_umi_sum = df.sum()
df_umi = df.div(barcode_umi_sum)
return df_umi
df_meta = pd.read_csv('../download/atlas/meta.tab', sep='\t', index_col=0)
df_meta.shape
df_ini = gene_exp_10x.load_gene_exp_to_df('../download/atlas/atlas_data_5k-sum/')
df_ini.shape
df_ini.columns.tolist()[:3]
df_meta.columns.tolist()
ser_doublet = df_meta['doublet']
ser_singlet = ser_doublet[ser_doublet == False]
print(ser_doublet.shape)
print(ser_singlet.shape)
ser_ct = df_meta['celltype']
ser_ct.value_counts().sort_values(ascending=False)
ser_ct.value_counts().sort_values(ascending=False).plot('bar', figsize=(15,5))
ser_ct['cell_1']
ser_ct[cols[0]]
cols = df_ini.columns.tolist()
new_cols = [(x, 'Cell Type: ' + str(ser_ct[x])) for x in cols]
print(new_cols[0])
df_cat = deepcopy(df_ini)
df_cat.columns = new_cols
df_cat.columns.tolist()[:5]
gene_sum = df_cat.sum(axis=1)
gene_sum.shape
num_cells = 10000
df_small = umi_norm(df_cat.iloc[:,:num_cells])
df_small.shape
net.load_df(df_small)
net.filter_N_top(inst_rc='row', N_top=250, rank_type='var')
net.normalize(axis='row', norm_type='zscore')
net.clip(-5,5)
net.load_df(net.export_df().round(2))
net.widget()